In [ ]:
import pandas as pd
import numpy as np
import plotly.express as px
In [ ]:
all_data =  pd.read_csv('spotify-2023.csv', encoding = 'latin-1')
all_data['streams'] = pd.to_numeric(all_data['streams'], errors = 'coerce')
In [ ]:
most_streamed_song_by_year = all_data.loc[all_data.groupby('released_year')['streams'].idxmax()]
clean_data = most_streamed_song_by_year[['track_name', 'artist(s)_name', 'released_year', 'streams']]
In [ ]:
fig = px.bar(clean_data, x = 'released_year', y = 'streams', hover_data = ['track_name', 'artist(s)_name', 'released_year', 'streams'], title = 'most streamed songs by year')
fig.show(renderer="notebook")
In [ ]:
top_songs_by_year = all_data.groupby('released_year').apply(lambda group: group.nlargest(3, 'streams')).reset_index(drop=True)
top_songs_by_year = top_songs_by_year[['track_name', 'artist(s)_name', 'released_year', 'streams']]
top_songs_by_year = top_songs_by_year.query("""released_year >= 2015""")
custom_colors = ['gray', 'white', 'gray']
fig = px.bar(top_songs_by_year, x = 'released_year', y = 'streams', color = top_songs_by_year.groupby('released_year').cumcount(), hover_data = ['track_name', 'artist(s)_name'], text = 'track_name', title = 'most 3 streamed songs')
fig.show(renderer="notebook")